# This script prepares HMDA data:
#   1/ read HMDA tract-level data
#   2/ convert older census tract schemes to 2010
#   3/ uses crosswalks to summarize at municipality level 
# Output: a place-year panel of HMDA data (hmda_panel.csv)

rm(list = ls())

library(tidyverse)


############## Specify path to data folder ##############
setwd("~")


############## Read HMDA data ##############

hmda <- read_csv("LARS_CAtracts_1990_2016.csv",
                 col_types = cols(tractid = col_character()))
hmda <- hmda %>% arrange(tractid, year)



############## Read crosswalks ##############

# Read tract regime crosswalks (calculated using NHGIS data)
tr90_10 <- read_csv("cw_tract_90_10.csv")
tr00_10 <- read_csv("cw_tract_00_10.csv")

# Read tract-place crosswalks for 1980 tract regime
tract_80 <- read_csv("tract_place_1980_bypop.csv")
names(tract_80) <- c("county", "tract", "placefp", "statefp", "cnty80nm", "plac80nm", "1990pop", "afact")

tract_80 <- tract_80 %>%
  mutate(tractid = paste(county, tract, sep = "")) %>%
  select(tractid, placefp, plac80nm, afact) %>%
  filter(!is.na(placefp))

# Read tract-place crosswalks for 2010 tract regime
col_names <- names(read_csv("tract_place_2010_bypop.csv", n_max = 0))
tract_10 <- read_csv("tract_place_2010_bypop.csv", col_names = col_names, skip = 2)

tract_10 <- tract_10 %>% 
  mutate(tractid = paste(county, tract, sep = "")) %>%
  select(tractid, placefp, placenm, afact) %>%
  filter(placefp != "99999")



############## Combine with tract-municipality crosswalk ##############

# 1990-1991: 1980 tract scheme - use 1980 tract-to-place crosswalk directly

hmda_80 <- hmda %>% filter(year <= 1991)

hmda_80 <- hmda_80 %>% left_join(tract_80, by = "tractid")

hmda_80 <- hmda_80 %>% filter(!is.na(placefp)) %>%
  group_by(placefp, year) %>%
  summarize(white = sum(white*obs*afact, na.rm = T),
            black = sum(black*obs*afact, na.rm = T),
            hisp = sum(hisp*obs*afact, na.rm = T),
            female = sum(female*obs*afact, na.rm = T),
            occup = sum(occup*obs*afact, na.rm = T),
            income = sum(income*obs*afact, na.rm = T),
            obs = sum(obs*afact)) 

hmda_80 <- hmda_80 %>% 
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs)



# 1992-2002: 1990 tract scheme - convert to 2010 then link to place

hmda_90 <- hmda %>% filter(year >= 1992, year <= 2002)

hmda_90 <- hmda_90 %>% 
  mutate(TRACT90 = str_remove(tractid, "\\.")) %>%
  filter(str_length(TRACT90) == 11)

hmda_90 <- hmda_90 %>% left_join(tr90_10, by = "TRACT90")
hmda_90 <- hmda_90 %>% filter(!is.na(TRACT10))

hmda_90 <- hmda_90 %>% group_by(TRACT10, year) %>%
  summarize(white = sum(white*obs*WEIGHT, na.rm = T),
            black = sum(black*obs*WEIGHT, na.rm = T),
            hisp = sum(hisp*obs*WEIGHT, na.rm = T),
            female = sum(female*obs*WEIGHT, na.rm = T),
            occup = sum(occup*obs*WEIGHT, na.rm = T),
            income = sum(income*obs*WEIGHT, na.rm = T),
            loan_amt = sum(loan_amt*obs*WEIGHT, na.rm = T),
            obs = sum(obs*WEIGHT, na.rm = T)) %>%
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs,
         loan_amt = loan_amt/obs)

hmda_90 <- hmda_90 %>%
  mutate(tractid = paste(substr(TRACT10, 1, 9), ".", substr(TRACT10, 10, 11), sep = "")) %>%
  left_join(tract_10, by = "tractid")

hmda_90 <- hmda_90 %>% filter(!is.na(placefp)) %>% 
  group_by(placefp, year) %>%
  summarize(white = sum(white*obs*afact, na.rm = T),
            black = sum(black*obs*afact, na.rm = T),
            hisp = sum(hisp*obs*afact, na.rm = T),
            female = sum(female*obs*afact, na.rm = T),
            occup = sum(occup*obs*afact, na.rm = T),
            income = sum(income*obs*afact, na.rm = T),
            obs = sum(obs*afact)) %>% 
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs)



# 2003-2011: 2000 tract scheme

hmda_00 <- hmda %>% filter(year >= 2003, year <= 2011)

hmda_00 <- hmda_00 %>% 
  mutate(TRACT00 = str_remove(tractid, "\\."))

hmda_00 <- hmda_00 %>% left_join(tr00_10, by = "TRACT00")

hmda_00 <- hmda_00 %>% filter(!is.na(TRACT10)) %>% 
  group_by(TRACT10, year) %>%
  summarize(white = sum(white*obs*WEIGHT, na.rm = T),
            black = sum(black*obs*WEIGHT, na.rm = T),
            hisp = sum(hisp*obs*WEIGHT, na.rm = T),
            female = sum(female*obs*WEIGHT, na.rm = T),
            occup = sum(occup*obs*WEIGHT, na.rm = T),
            income = sum(income*obs*WEIGHT, na.rm = T),
            loan_amt = sum(loan_amt*obs*WEIGHT, na.rm = T),
            obs = sum(obs*WEIGHT, na.rm = T)) %>%
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs,
         loan_amt = loan_amt/obs)

hmda_00 <- hmda_00 %>%
  mutate(tractid = paste(substr(TRACT10, 1, 9), ".", substr(TRACT10, 10, 11), sep = "")) %>%
  left_join(tract_10, by = "tractid")

hmda_00 <- hmda_00 %>% filter(!is.na(placefp)) %>% 
  group_by(placefp, year) %>%
  summarize(white = sum(white*obs*afact, na.rm = T),
            black = sum(black*obs*afact, na.rm = T),
            hisp = sum(hisp*obs*afact, na.rm = T),
            female = sum(female*obs*afact, na.rm = T),
            occup = sum(occup*obs*afact, na.rm = T),
            income = sum(income*obs*afact, na.rm = T),
            obs = sum(obs*afact)) 

hmda_00 <- hmda_00 %>% 
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs)



# 2012-2016: 2010 tract scheme

hmda_10 <- hmda %>% filter(year >= 2012)

hmda_10 <- hmda_10 %>% left_join(tract_10, by = "tractid")

hmda_10 <- hmda_10 %>% filter(!is.na(placefp)) %>% 
  group_by(placefp, year) %>%
  summarize(white = sum(white*obs*afact, na.rm = T),
            black = sum(black*obs*afact, na.rm = T),
            hisp = sum(hisp*obs*afact, na.rm = T),
            female = sum(female*obs*afact, na.rm = T),
            occup = sum(occup*obs*afact, na.rm = T),
            income = sum(income*obs*afact, na.rm = T),
            obs = sum(obs*afact)) 

hmda_10 <- hmda_10 %>% 
  mutate(white = white/obs,
         black = black/obs,
         hisp = hisp/obs,
         female = female/obs,
         occup = occup/obs,
         income = income/obs)

hmda_tr <- rbind(hmda_80, hmda_90, hmda_00, hmda_10)
write_csv(hmda_tr, "hmda_panel.csv")
